In [1]:
import os
import numpy as np
import time
from nbminer.notebook_miner import NotebookMiner
from nbminer.cells.cells import Cell
from nbminer.stats.summary import Summary
from nbminer.stats.multiple_summary import MultipleSummary
people = os.listdir('../testbed/Final')
notebooks = []
for person in people:
person = os.path.join('../testbed/Final', person)
if os.path.isdir(person):
direc = os.listdir(person)
notebooks.extend([os.path.join(person, filename) for filename in direc if filename.endswith('.ipynb')])
notebook_objs = []
for nb in notebooks:
nb_obj = NotebookMiner(nb)
notebook_objs.append(nb_obj)
nb_obj.write_to_file(nb)
In [3]:
ac = notebook_objs[0].get_all_cells()
In [6]:
list_of_source = []
for cell in ac:
if cell.is_python():
list_of_source.append(cell.get_source())
if cell.is_markdown():
print (cell.get_source())
In [7]:
(list_of_source[0])
Out[7]:
In [2]:
notebook_objs[0]
Out[2]:
Then, lets gather all the objects into a summary object and get the summary arrays for the group of notebooks
In [2]:
multiple = MultipleSummary(notebook_objs)
header, val = multiple.all_full_summaries()
In [12]:
val.shape
Out[12]:
In [4]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
In [37]:
plt.rcParams['figure.figsize'] = (20, 20)
fig, axes = plt.subplots(3,3)
for i in range(9):
axes[int(i/3),i%3].hist(val[:,i+1].astype(float), bins=20)
axes[int(i/3),i%3].set_title(header[i+1])